clear all
global system "linux"

if "${system}" == "linux" {
	*global code "/"
	global code "/"
	global s "/"
}

run "${code}${s}_set-path.do"

*****  Prep the real data

** static data
use "${cached}/embs-GSE-loanlevel-combined.dta", clear // 1c-combine-loanlevel-GSEdata.do


** Sample Selection
keep if inlist(embs_product, "FHLG30", "FHLG30HILTV", "FHLG30JM", "FNM30", "FNM30HILTV",  "FNM30JM") 
keep if inlist(state_id, "HI", "AK", "VI", "GU") == 0
keep if inlist(state_id, "CA","CO","CT", "DC", "MA") | inlist(state_id, "MD","NJ", "NY", "VA", "WA") // keep only state_ids where CLLs were increased
keep if orig_term == 360
keep if num_units==1
keep if orig_ym >= ym(2009, 3)
keep if proptype_id == "SF" // keep only single-family properties


** clean vars
do "${code}/_clean-embs-vars.do"
drop if orig_loan_amount == .
drop if orig_loan_to_value == .

// additional sample selection
keep if Purchase==1 & orig_loan_to_value<=80


** Cutoff variables
gen homeprice = orig_loan_amount / orig_loan_to_value* 100
gen HiP = homeprice > (cll/ .8)
gen PaboveC = homeprice- (cll/.8)
gen PaboveC_bin = ceil((homeprice- (cll/.8))/ 5000) + 10
gen baldiff = orig_loan_amount - cll
gen baldiff_bin = ceil(baldiff/ 5000) + 10


foreach x of varlist PaboveC baldiff {
	replace `x'_bin = . if inrange(`x'_bin, 1, 20) == 0
}
replace PaboveC = PaboveC/ 1000
replace baldiff = baldiff/ 1000


** create other controls
gen tbaelig = tbaeligcode_id == "Y"

** Create bins for the control variables
qui do "${code}/tba_elig/_LLPAbins.do" credit_score orig_loan_to_value

egen LLPAbin = group(credit_score_bin ltv_bin) // the inputs are created by "_LLPAbins.do"


save "${temp}/embs-417k.dta", replace
